*  Preamble
clear all
qui cap log close
qui set more off, perm
qui set varabbrev on, perm
qui set matsize 5000
pause on 

* Macros
global maindir "f:\econometrics\WP41_RatioNormEff\application\"
cd "${maindir}\"
log using Application2025March08.log, replace

use statedata, clear
drop if re_arrests==1  // felony re-arrests removed; 18% lost 
drop if rpp<0 | blk<0
gen one=1
gen eps=one*0.00001
gen age2=(age^2)/100
gen y=dur
gen d=rpp
gen agemale=age*male


local xlist "one age age2 agemale blk male tjail"
local xintlist "one agemale blk male"  // list of x used as interaction term

local xdlist ""
foreach x of local xintlist {
	gen `x'd=`x'*d
	local xdlist = "`xdlist'" + "d`x'"
}
global xlist "one age age2 agemale blk male tjail"
global xdlist  "oned agemaled blkd maled"  // interaction term with D
global xintlist "one agemale blk male" //list of x used as interaction term
global varlist " $xlist $xdlist " 


program weib
version 17.0 // (or version 18.5 for StataNow)
args lnf theta1 theta2
tempvar p M R
quietly generate double `p' = exp(`theta2')
quietly generate double `M' = ($ML_y1*exp(-`theta1'))^`p'
quietly generate double `R' = ln($ML_y1)-`theta1'
quietly replace `lnf' = $ML_y2*(ln(`theta2')+(`theta2'-1)*ln($ML_y1) + `theta1') - ($ML_y1)^(`theta2')*exp(`theta1')
end



//************** Table 1: Data Statistics **************//  
sum y d age blk male tjail 


//************** Table 2: Weibull MLE ******************//  
ml model lf weib (y del = $varlist ) ()
ml maximize
matrix bweb=e(b)


//******* Estimating effects computed at t evaluation points

//*** Estimating Propensity Score for Nonparametrics
qui logit d $xlist
predict p

//gen tpoint=.  // time points evaluated at

gen y0=y if d==0
gen y1=y if d==1
gen p0=p if d==0
gen p1=p if d==1
gen evp=round(p*100)/100   // ps evaluation point for estimating E(Y|D,PS) 


scalar T=20 // total number of t evaluation points
scalar initial=0.6  // starting point of t
scalar incr=0.2      // increment of t
gen tpoint = initial + (_n - 1) * incr if _n<=T
gen mestweb=.   
gen mestlc=. 
gen selc=. 
gen mestec=. 
gen seec=.
gen mest=.


// set up the time point: from 1.4 to 4.4 by 0.2 
forv i=1/20 {
	scalar id=tpoint[`i']
	gen d_i=(y>id)
	
	
//********** Nonparametric Estimnator each t **************//	
gen d0_i=(y0>id)
gen d1_i=(y1>id)

qui lpoly d0_i p0, at(evp) kernel(gaussian) gen(ex0 y0hat)
qui lpoly d1_i p1, at(evp) kernel(gaussian) gen(ex1 y1hat)

gen mest`i'=y1hat/y0hat -1
qui sum mest`i'
replace mest=r(mean) in `i'



//********** Estimator specifying linear cumulative hazard at each t **************//	
mlexp ( (1-d_i)*ln(1-exp(-(({xb: "$varlist "})>eps)*({xb:})+(({xb:})<eps)*eps)) -d_i*abs({xb:})  ), diff technique(nr) iterate(1000)

matrix blc=e(b)
scalar n=e(N) 



//************** Estimator specifying exponential cumulative hazard at each t **************//
mlexp ( d_i*ln(exp(-exp({xb: "$varlist "}))) + (1-d_i)*ln(1-(exp(-exp({xb:}))))  ), diff technique(nr) iterate(1000)

matrix bec=e(b)


// Caculating Effect & SE	
	mata {
	   st_view(y=.,.,tokens("d_i"))
	   st_view(mx=.,.,tokens("$varlist"))
	   st_view(m=.,.,tokens("$xlist"))
	   st_view(md=.,.,tokens("$xdlist"))
	   st_view(mint=.,.,tokens("$xintlist"))
	   st_view(one=.,.,tokens("one "))
	   km=cols(m)
	   kmd=cols(md)
	   n=rows(y)
	   id = st_numscalar("id")
	   bweb0=st_matrix("bweb")
	   bweb=(bweb0[1,km+kmd+1],bweb0[1,2::km+kmd])
	   bmweb=(bweb[1,1::km])'
	   bmdweb=(bweb[1,km+1::km+kmd])'
	   baweb=bweb0[1,km+kmd+2]
	   estw10=exp( -(exp(mint*bmdweb)-one):*exp(m*bmweb)*(id^baweb))- one
       st_matrix("estw10",estw10)
	   
	   blc=st_matrix("blc")
	   bmdlc=(blc[1,km+1::km+kmd])'
	   estlc10=exp(-(mint*bmdlc))-one 
	   scorelc=-(y:*mx):/(one-exp(-mx*blc'))
	   etalc=scorelc*invsym(scorelc'scorelc/n)
	   gglc=exp(-mint*bmdlc)
	   el1lc=J(km,1,0)
	   el2lc=mean(-mint:*gglc)
	   ellc=(el1lc\el2lc')
	   ggmlc=mean(gglc)
	   inflc=gglc-one*(ggmlc)+etalc*ellc
	   vlc=inflc'*inflc/(n^2)
	   st_matrix("vlc", vlc)
	   st_matrix("estlc10",estlc10)
	   
	   bec=st_matrix("bec")
	   bmdec=(bec[1,km+1::km+kmd])'
	   bmec=(bec[1,1::km])'
	   estec10=exp(-exp(m*bmec+mint*bmdec)+exp(m*bmec)) -one
	   sec=-(y:*exp(mx*bec'):*mx):/(one-exp(-exp(mx*bec')))
	   etaec=sec*invsym(sec'*sec/n)
	   ggec=exp( -exp(m*bmec+mint*bmdec)+exp(m*bmec))
	   el1ec=mean(-m:*ggec:*(exp(m*bmec+mint*bmdec)-exp(m*bmec))) 
	   el2ec=mean(-mint:*ggec:*exp(m*bmec+mint*bmdec))
	   elec=(el1ec'\el2ec')
	   ggmec=mean(ggec)
	   infec=ggec-one*(ggmec)+etaec*elec
	   vec=infec'*infec/(n^2)
	   st_matrix("vec", vec)
	   st_matrix("estec10",estec10)
	}
	

getmata estw10, force 
qui sum estw10
replace mestweb=r(mean) in `i'

getmata estlc10, force 
qui sum estlc10
replace mestlc=r(mean)  in `i'
replace selc=sqrt(vlc[1,1])  in `i'

getmata estec10, force 
qui sum estec10 
replace mestec=r(mean)  in `i'
replace seec=sqrt(vec[1,1])  in `i'
disp `i'
drop d_i d0_i d1_i estw10 estlc10 estec10 y0hat ex0 y1hat ex1 mest`i'
}

recode mestweb mestlc mestec selc seec (0=.)

gen mestlc_u=mestlc+1.96*selc 
gen mestlc_l=mestlc-1.96*selc



//************** Figure 1: Estimated Effect ******************//  

twoway (line mestlc tpoint, sort lpattern(dash)) (line mestec tpoint, sort lpattern(vshortdash))  (line mestweb tpoint, sort lpattern(tight_dot)) (line mest tpoint, sort lpattern(solid)), xtitle("t year", size(medlarge)) title("PS-Kernel & Linear CumHazard & Expo CumHazard & Weibull",  size(medium)) legend(label(1 "L. CumHazard") label(2 "Expo. CumHazard") label(3 "Weibull") label(4 "Nonpara")  size(small)) xscale(r(0.6 4.40)) xlabel(0.6(.4)4.40) yscale(r(0.1 0.8)) ylabel(0.1(0.1)0.8)  graphregion(margin(small)) xsize(2.4) ysize(2) saving("Figure11.gph", replace) 


twoway (line mestlc_u tpoint, sort lpattern(dash) lcolor(black)) (line mestlc tpoint , sort lpattern(solid) lcolor(black))  (line mestlc_l tpoint , sort lpattern(dash) lcolor(black)) , xtitle("t year", size(medlarge)) title("Linear CumHazard with 95% CI",  size(medium)) legend(label(1 "95% CI") label(2 "L. CumHazard")  size(small)) xscale(r(0.6 4.40)) xlabel(0.6(.4)4.40) yscale(r(0.1 0.8)) ylabel(0.1(0.1)0.8)  graphregion(margin(small)) xsize(2.4) ysize(2) saving("Figure12.gph", replace) 	
	
graph combine "Figure11.gph"	"Figure12.gph", xsize(4) ysize(2) saving("Figure1.gph", replace) 


log close	

scalar T=21 // total number of t evaluation points
scalar initial=0.3  // starting point of t
scalar incr=0.2      // increment of t
gen tpoint2 = initial + (_n - 1) * incr if _n<=T
//************* Figure 2: Nonparametric Estimates 3d graph ****************//
forv i=1/21{
	
    scalar id=tpoint2[`i']
	gen d_i=(y>id)
	
	
//********** Nonparametric Estimnator each t **************//	
gen d0_i=(y0>id)
gen d1_i=(y1>id)

qui lpoly d0_i p0, at(evp) kernel(gaussian) gen(ex0 y0hat)
qui lpoly d1_i p1, at(evp) kernel(gaussian) gen(ex1 y1hat)
gen mest`i'=y1hat/y0hat -1

drop d_i d0_i d1_i ex0 y0hat ex1 y1hat
}



bysort evp: gen napp=_n 
export excel evp mest1 mest2 mest3 mest4 mest5 mest6 mest7 mest8 mest9 mest10 mest11 mest12 mest13 mest14 mest15 mest16 mest17 mest18 mest19 mest20 mest21 if napp==1 & evp>0.23 & evp<0.71 using "nonpara_3d_results_re.xls", firstrow(variables) replace	
//graph is generated in Excel	


	
	
	
	
	
	
	
	
	
	
	
	